******************************************************************************
*Nonfarm payroll
******************************************************************************

*************************
* NFP surprises from Bloomberg
************************* 

import excel "../data/nonfarm_payrolls/raw_data/bloomberg_nfp.xlsx", sheet("NFP") cellrange(A6) firstrow allstring clear

*Date of the release
tostring ECO_RELEASE_DT, replace
generate date = date(ECO_RELEASE_DT, "YMD")
format date %td
order date
drop ECO_RELEASE_DT
drop Dates

*Destring numeric values
destring *, replace ignore("#N/A N/A")
rename *, lower

*Compute the surprise in nonfarm payrolls
generate nfp = actual_release - bn_survey_median
keep date nfp

drop if missing(nfp)

*Standardize over sample period over which have forecasts
egen nfp_std=std(nfp)

tempfile bloomberg_surprises
save	`bloomberg_surprises'
	
	
*************************
* NFP actuals from Philadelphia Fed
************************* 	

**** 1st step: Obtain the release dates
import excel "../data/nonfarm_payrolls/raw_data/philadelphia_fed/Release_ Dates-Employment_Situation-BLS.xls", clear cellrange(A3:M50) firstrow 

drop if missing(Year)

*prepare to do reshape command
local varlist "Jan Feb March April May June July Aug Sept Oct Nov Dec"
foreach var of local varlist {
	rename `var' date`var'
}

reshape long date, i(Year) j(month_string) string
drop if missing(date)

*Generate monthly identifier
generate month = 1 	if month_string == "Jan"
replace month = 2 	if month_string == "Feb"
replace month = 3 	if month_string == "March"
replace month = 4 	if month_string == "April"
replace month = 5 	if month_string == "May"
replace month = 6 	if month_string == "June"
replace month = 7 	if month_string == "July"
replace month = 8 	if month_string == "Aug"
replace month = 9 	if month_string == "Sept"
replace month = 10 	if month_string == "Oct"
replace month = 11 	if month_string == "Nov"
replace month = 12 	if month_string == "Dec"
sort Year month

generate ym = ym(Year,month)
format ym %tm
order ym
drop Year month_string month

tempfile release_dates
save	`release_dates'	
	
**** 2nd step: Obtain the numbers
import excel "../data/nonfarm_payrolls/raw_data/philadelphia_fed/employ_level_first_second_third.xlsx", clear sheet("DATA") cellrange(A5:B704) firstrow

rename First actual_value

*Generate ym of which the release value refers to:
generate year = substr(Date,1,4)
generate month = substr(Date,6,7)
destring * , replace
generate ym = ym(year,month)
format ym %tm
order ym 
drop Date year month

merge 1:1 ym using `release_dates', keep(3) nogen
sort date
drop ym

tempfile phily_fed_actuals
save	`phily_fed_actuals'

	
*************************
* Surprises after 1997, actuals prior to that
*************************	
	
use `bloomberg_surprises', clear 
merge 1:1 date using `phily_fed_actuals'	
sort date

*Keep the sample since 1989. Required to start earlier have NFP for first meeting in Jun 1989
keep if date >= td(01jan1989)

*Standardize over sample over which we don't have forecasts
replace actual_value = . if _merge == 3
egen actual_value_std = std(actual_value)
drop _merge 

*Replace historic data
replace nfp_std = actual_value_std if missing(nfp_std)

keep date nfp_std

compress 
save "../data/nonfarm_payrolls/clean_data/nonfarm_payrolls.dta", replace